suppressPackageStartupMessages({
## Common
library(tidyverse)
library(magrittr)
library(future.apply)
library(here)
library(AnnotationHub)
library(purrr)
library(scales)
library(kableExtra)
library(tictoc)
library(ggrepel)
library(RColorBrewer)
library(ggpubr)
library(pander)
library(rmarkdown)
## Project specific
library(edgeR)
library(limma)
library(pheatmap)
library(cqn)
library(DT)
library(htmltools)
library(msigdbr)
library(reactable)
library(fgsea)
})
if (interactive()) setwd(here::here())
theme_set(theme_bw())
cores <- availableCores() - 1
source("~/bioinformatics/bioToolkit/lbFuncs.R")
ens_species <- "Mus musculus"
ens_release <- "104"
ens_assembly <- "GRCm39"
ah <- AnnotationHub() %>%
subset(species == ens_species) %>%
subset(rdataclass == "EnsDb")
ahId <- ah$ah_id[str_detect(ah$title, ens_release)]
ensDb <- ah[[ahId]]
chrInfo <- getChromInfoFromEnsembl(ens_assembly, release = ens_release) %>%
dplyr::filter(coord_system == "chromosome")
primary_chrs <- chrInfo$name
transcripts <- transcripts(ensDb, filter = SeqNameFilter(primary_chrs))
txLen <- exonsBy(ensDb, "tx", filter = SeqNameFilter(primary_chrs)) %>%
width() %>%
vapply(sum, integer(1))
mcols(transcripts) <- mcols(transcripts)[
c("tx_id", "gene_id", "gc_content")
] %>%
as.data.frame() %>%
mutate(length = txLen)
geneGc <- transcripts %>%
mcols() %>%
as_tibble() %>%
group_by(gene_id) %>%
summarise(
gc_content = sum(gc_content*length) / sum(length),
length = ceiling(median(length))
)
genes <- genes(ensDb, filter = SeqNameFilter(primary_chrs)) %>%
.[,c("gene_id", "gene_name", "gene_biotype", "entrezid")] %>%
as_tibble() %>%
left_join(geneGc) %>%
GRanges()
An EnsDb object was obtained for Ensembl release 101
with the AnnotationHub package. This contained the
information required to set up gene and transcript annotations.
Gene-level estimates of GC content and length were also generated which
may be required as covariates in suitable analyses.
entrezGenes <- genes %>%
as.data.frame() %>%
dplyr::filter(!is.na(entrezid)) %>%
mutate(entrezid = unAsIs(entrezid)) %>%
unnest(entrezid) %>%
dplyr::rename(entrez_gene = entrezid)
geneChrs <- as_tibble(genes) %>%
dplyr::select(gene_id, chromosome = seqnames)
metadata <- read_tsv(here("misc/SYNAPSE_METADATA_MANIFEST.tsv")) %>%
left_join(read_csv(here("misc/metaAPOE.csv"))) %>%
dplyr::select(
sample = specimenID, species, genotypeBackground, litter, dateBirth,
dateDeath, genotype = Genotype, sex = Sex, age = Age, lane, basename = name,
modelSystemName, individualID, study
) %>%
dplyr::filter(str_detect(sample, "_3M_")) %>%
mutate(basename = str_remove(basename, ".bam_R(1|2).fastq.gz")) %>%
distinct(sample, .keep_all = TRUE) %>%
mutate(
group = as.factor(paste0(genotype, "_", age, "_", sex)),
genotype = as.factor(genotype)
) %>%
dplyr::arrange(genotype, group)
genoCols <- metadata$genotype %>%
unique() %>%
length() %>%
brewer.pal("Set1") %>%
setNames(unique(metadata$genotype))
samples_byGroup <- metadata %>%
split(f = .$group) %>%
sapply(function(x){
pull(x, sample)
}, simplify = FALSE)
dgeList <- readRDS(here("files/dgeList.Rds"))
design <- model.matrix(~0 + group, data = dgeList$samples) %>%
set_colnames(str_remove(colnames(.), "group"))
contrasts <- makeContrasts(
APOE2v3_female = APOE2_3M_female - APOE3_3M_female,
APOE2v3_male = APOE2_3M_male - APOE3_3M_male,
APOE4v3_female = APOE4_3M_female - APOE3_3M_female,
APOE4v3_male = APOE4_3M_male - APOE3_3M_male,
levels = colnames(design)
)
topTables <- readRDS(here("files/topTables_cqn.Rds"))
deGenes <- lapply(topTables, dplyr::filter, DE)
Multiple gene set databases were accessed with the
msigdbr package. For this analysis the Hallmark, KEGG,
Wikipathways and chromosomal based gene sets were chosen.
hm <- msigdbr(ens_species, category = "H") %>%
left_join(entrezGenes) %>%
dplyr::filter(!is.na(gene_id)) %>%
distinct(gs_name, gene_id, .keep_all = TRUE) %>%
mutate(gs_name = str_remove(gs_name, "^HALLMARK_"))
hmByGene <- hm %>%
split(f = .$gene_id) %>%
lapply(extract2, "gs_name")
hmByID <- hm %>%
split(f = .$gs_name) %>%
lapply(extract2, "gene_id")
kg <- msigdbr(ens_species, category = "C2", subcategory = "CP:KEGG") %>%
left_join(entrezGenes) %>%
dplyr::filter(!is.na(gene_id)) %>%
distinct(gs_name, gene_id, .keep_all = TRUE) %>%
mutate(gs_name = str_remove(gs_name, "^KEGG_"))
kgByGene <- kg %>%
split(f = .$gene_id) %>%
lapply(extract2, "gs_name")
kgByID <- kg %>%
split(f = .$gs_name) %>%
lapply(extract2, "gene_id")
wk <- msigdbr(ens_species, category = "C2", subcategory = "CP:WIKIPATHWAYS") %>%
left_join(entrezGenes) %>%
dplyr::filter(!is.na(gene_id)) %>%
distinct(gs_name, gene_id, .keep_all = TRUE) %>%
mutate(gs_name = str_remove(gs_name, "^WP_"))
wkByGene <- wk %>%
split(f = .$gene_id) %>%
lapply(extract2, "gs_name")
wkByID <- wk %>%
split(f = .$gs_name) %>%
lapply(extract2, "gene_id")
hm_fry <- contrasts %>%
apply(MARGIN = 2, function(contrast){
cpm(dgeList$counts, log = TRUE) %>%
fry(
index = hmByID,
design = design,
contrast = contrast,
sort = "mixed"
) %>%
rownames_to_column("Pathway") %>%
as_tibble() %>%
mutate(
Pathway = str_trunc(Pathway, width = 18),
PValue = formatC(PValue, digits = 1, format = "e"),
FDR = formatC(FDR, digits = 1, format = "e"),
PValue.Mixed = formatC(PValue.Mixed, digits = 1, format = "e"),
FDR.Mixed = formatC(FDR.Mixed, digits = 1, format = "e"),
)
})
datatable(hm_fry[[1]])
datatable(hm_fry[[2]])
datatable(hm_fry[[3]])
datatable(hm_fry[[4]])
kg_fry <- contrasts %>%
apply(MARGIN = 2, function(contrast){
cpm(dgeList$counts, log = TRUE) %>%
fry(
index = kgByID,
design = design,
contrast = contrast,
sort = "mixed"
) %>%
rownames_to_column("Pathway") %>%
as_tibble() %>%
mutate(
Pathway = str_trunc(Pathway, width = 18),
PValue = formatC(PValue, digits = 1, format = "e"),
FDR = formatC(FDR, digits = 1, format = "e"),
PValue.Mixed = formatC(PValue.Mixed, digits = 1, format = "e"),
FDR.Mixed = formatC(FDR.Mixed, digits = 1, format = "e"),
)
})
datatable(kg_fry[[1]])
datatable(kg_fry[[2]])
datatable(kg_fry[[3]])
datatable(kg_fry[[4]])
wk_fry <- contrasts %>%
apply(MARGIN = 2, function(contrast){
cpm(dgeList$counts, log = TRUE) %>%
fry(
index = wkByID,
design = design,
contrast = contrast,
sort = "mixed"
) %>%
rownames_to_column("Pathway") %>%
as_tibble() %>%
mutate(
Pathway = str_trunc(Pathway, width = 18),
PValue = formatC(PValue, digits = 1, format = "e"),
FDR = formatC(FDR, digits = 1, format = "e"),
PValue.Mixed = formatC(PValue.Mixed, digits = 1, format = "e"),
FDR.Mixed = formatC(FDR.Mixed, digits = 1, format = "e"),
)
})
datatable(wk_fry[[1]])
datatable(wk_fry[[2]])
datatable(wk_fry[[3]])
datatable(wk_fry[[4]])
dist_winRanges <- readRDS(here("files/dist_winRanges.Rds"))
## Takes ~5 mins to run so .Rds saved for convenience
## Note a few genes are lost that do not fall within a diversity range
genes_diversity_path <- here("files/genes_diversity.Rds")
if (!file.exists(genes_diversity_path)) {
genes_diversity <- lapply(dist_winRanges, function(winRanges){
overlaps <- findOverlaps(GRanges(dgeList$genes), winRanges)
lapply(unique(queryHits(overlaps)), function(query){
subjects <- subjectHits(overlaps)[queryHits(overlaps) == query]
diversity <- winRanges$diversity[subjects] %>%
mean()
dgeList$genes[query,] %>%
mutate(diversity = diversity)
}) %>%
bind_rows() %>%
GRanges()
})
saveRDS(genes_diversity, genes_diversity_path)
} else {
genes_diversity <- readRDS(genes_diversity_path)
}
thresholds <- seq(0.1, 1.0, 0.1)
threshOutcomes <- sapply(colnames(contrasts), function(cont){
topT_div <- as_tibble(genes_diversity[[cont]]) %>%
dplyr::select(gene_id, diversity) %>%
left_join(topTables[[cont]])
n_DE <- topT_div %>%
dplyr::filter(DE) %>%
nrow()
n_notDE <- topT_div %>%
dplyr::filter(!DE) %>%
nrow()
lapply(thresholds, function(threshVal){
topT_div %>%
dplyr::filter(diversity > threshVal) %>%
summarise(
DE_count = sum(DE),
DE_prop = DE_count / n_DE,
notDE_count = sum(!DE),
notDE_prop = notDE_count / n_notDE
) %>%
mutate(threshold = threshVal)
}) %>%
bind_rows()
}, simplify = FALSE)
threshPlots_de <- lapply(threshOutcomes, function(x){
x %>%
pivot_longer(
cols = c(DE_count, DE_prop, notDE_count, notDE_prop),
names_to = c("sig", "type"),
names_sep = "_"
) %>%
pivot_wider(
names_from = "type",
values_from = "value"
) %>%
ggplot(aes(x = threshold, y = prop, group = sig, fill = sig)) +
geom_bar(position = "dodge", stat = "identity", width = 0.08) +
geom_text(
aes(x = threshold, y = prop, label = count),
size = 3,
nudge_y = 0.03,
nudge_x = c(-0.019, 0.019)
) +
scale_x_continuous(breaks = thresholds) +
scale_y_continuous(breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1)) +
scale_fill_manual(values = c("grey", "black"), labels = c("DE", "Not DE")) +
theme(plot.margin = margin(0, 5, 0, 0, unit = "cm")) +
coord_cartesian(clip = "off", xlim = c(0.1,1)) +
labs(y = "Proportion of\ngenes removed", x = "Diversity threshold") +
theme(
legend.position = c(1.05, 0.5),
legend.title = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_blank(),
panel.border = element_blank(),
)
})
hm_fry_eqtl <- colnames(contrasts) %>%
sapply(function(cont){
lapply(thresholds, function(threshVal){
genes_passed <- genes_diversity[[cont]] %>%
as_tibble() %>%
dplyr::filter(diversity <= threshVal) %>%
pull(gene_id)
cpm(
dgeList$counts[genes_passed,],
log = TRUE
) %>%
fry(
index = hmByID,
design = design,
contrast = contrasts[,cont],
sort = "mixed"
) %>%
rownames_to_column("Pathway") %>%
as_tibble() %>%
mutate(
diversity_threshold = threshVal,
rank = row_number(),
sig = factor(
ifelse(FDR.Mixed < 0.05, "DE", "Not DE"),
levels = c("DE", "Not DE")
)
)
})
}, simplify = FALSE)
hm_fry_eqtl_top <- sapply(hm_fry_eqtl, function(contrast){
lapply(contrast, function(threshold){
threshold[1:20,] %>%
pull(Pathway)
}) %>%
unlist() %>%
unique()
}, simplify = FALSE)
rankPlots_hm_fry <- sapply(colnames(contrasts), function(cont){
# plotly::ggplotly({
lapply(hm_fry_eqtl[[cont]], function(threshold){
threshold %>%
dplyr::filter(Pathway %in% hm_fry_eqtl_top[[cont]]) %>%
mutate(rel_rank = row_number())
}) %>%
bind_rows() %>%
mutate(
label = ifelse(
diversity_threshold == 1,
str_trunc(Pathway, width = 30),
NA
)
) %>%
ggplot(aes(
diversity_threshold,
rel_rank,
# -log10(FDR.Mixed),
group = Pathway,
colour = Pathway,
text = sprintf(
paste0(
"Pathway: %s<br>",
"Rank: %i<br>",
"Relative rank: %i<br>",
"FDR (mixed): %e<br>",
"Number of genes: %i<br>",
"Direction: %s"
),
Pathway, rank, rel_rank, FDR.Mixed, NGenes, Direction
)
)) +
geom_line() +
geom_point(aes(shape = sig), size = 4) +
geom_text(aes(label = rank), colour = "black", size = 2.1) +
geom_text(aes(x = 1.01, label = label), size = 3.2, hjust = 0, na.rm = TRUE) +
scale_y_reverse(breaks = seq(1, length(hm_fry_eqtl_top[[cont]]), 1)) +
scale_x_continuous(breaks = thresholds) +
scale_shape_manual(values = c(17, 19)) +
labs(x = "Diversity score cut-off", y = "\nRelative rank") +
theme(plot.margin = margin(0, 5, 0, 0, unit = "cm")) +
coord_cartesian(clip = "off") +
theme(
legend.position = "none",
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_blank(),
panel.border = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
# }, tooltip = "text")
}, simplify = FALSE)
ggarrange(
rankPlots_hm_fry$APOE2v3_female,
NULL,
threshPlots_de$APOE2v3_female,
ncol = 1,
heights = c(4,0.2,1)
)
ggarrange(
rankPlots_hm_fry$APOE2v3_male,
NULL,
threshPlots_de$APOE2v3_male,
ncol = 1,
heights = c(4,0.2,1)
)
ggarrange(
rankPlots_hm_fry$APOE4v3_female,
NULL,
threshPlots_de$APOE4v3_female,
ncol = 1,
heights = c(4,0.2,1)
)
ggarrange(
rankPlots_hm_fry$APOE4v3_male,
NULL,
threshPlots_de$APOE4v3_male,
ncol = 1,
heights = c(4,0.2,1)
)
kg_fry_eqtl <- colnames(contrasts) %>%
sapply(function(cont){
lapply(thresholds, function(threshVal){
genes_passed <- genes_diversity[[cont]] %>%
as_tibble() %>%
dplyr::filter(diversity <= threshVal) %>%
pull(gene_id)
cpm(
dgeList$counts[genes_passed,],
log = TRUE
) %>%
fry(
index = kgByID,
design = design,
contrast = contrasts[,cont],
sort = "mixed"
) %>%
rownames_to_column("Pathway") %>%
as_tibble() %>%
mutate(
diversity_threshold = threshVal,
rank = row_number(),
sig = factor(
ifelse(FDR.Mixed < 0.05, "DE", "Not DE"),
levels = c("DE", "Not DE")
)
)
})
}, simplify = FALSE)
# plotly::ggplotly({
# kg_fry_eqtl$APOE4v3_female %>%
# bind_rows() %>%
# ggplot(aes(diversity_threshold, rank, group = Pathway, colour = Pathway)) +
# geom_line() +
# geom_point() +
# theme(legend.position = "none") +
# scale_y_reverse(breaks = c(1, seq(10, 180, 10))) +
# scale_x_continuous(breaks = seq(0, 1, 0.1)) +
# labs()
# })
kg_fry_eqtl_top <- sapply(kg_fry_eqtl, function(contrast){
lapply(contrast, function(threshold){
threshold[1:20,] %>%
pull(Pathway)
}) %>%
unlist() %>%
unique()
}, simplify = FALSE)
rankPlots_kg_fry <- sapply(colnames(contrasts), function(cont){
# plotly::ggplotly({
lapply(kg_fry_eqtl[[cont]], function(threshold){
threshold %>%
dplyr::filter(Pathway %in% kg_fry_eqtl_top[[cont]]) %>%
mutate(rel_rank = row_number())
}) %>%
bind_rows() %>%
mutate(
label = ifelse(
diversity_threshold == 1,
str_trunc(Pathway, width = 30),
NA
)
) %>%
ggplot(aes(
diversity_threshold,
rel_rank,
# -log10(FDR.Mixed),
group = Pathway,
colour = Pathway,
text = sprintf(
paste0(
"Pathway: %s<br>",
"Rank: %i<br>",
"Relative rank: %i<br>",
"FDR (mixed): %e<br>",
"Number of genes: %i<br>",
"Direction: %s"
),
Pathway, rank, rel_rank, FDR.Mixed, NGenes, Direction
)
)) +
geom_line() +
geom_point(aes(shape = sig), size = 4) +
geom_text(aes(label = rank), colour = "black", size = 2.1) +
geom_text(aes(x = 1.01, label = label), size = 3.2, hjust = 0, na.rm = TRUE) +
scale_y_reverse(breaks = seq(1, length(kg_fry_eqtl_top[[cont]]), 1)) +
scale_x_continuous(breaks = thresholds) +
scale_shape_manual(values = c(17, 19)) +
labs(x = "Diversity score cut-off", y = "\nRelative rank") +
theme(plot.margin = margin(0, 5, 0, 0, unit = "cm")) +
coord_cartesian(clip = "off") +
theme(
legend.position = "none",
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_blank(),
panel.border = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
# }, tooltip = "text")
}, simplify = FALSE)
ggarrange(
rankPlots_kg_fry$APOE2v3_female,
NULL,
threshPlots_de$APOE2v3_female,
ncol = 1,
heights = c(4,0.2,1)
)
ggarrange(
rankPlots_kg_fry$APOE2v3_male,
NULL,
threshPlots_de$APOE2v3_male,
ncol = 1,
heights = c(4,0.2,1)
)
ggarrange(
rankPlots_kg_fry$APOE4v3_female,
NULL,
threshPlots_de$APOE4v3_female,
ncol = 1,
heights = c(4,0.2,1)
)
ggarrange(
rankPlots_kg_fry$APOE4v3_male,
NULL,
threshPlots_de$APOE4v3_male,
ncol = 1,
heights = c(4,0.2,1)
)
wk_fry_eqtl <- colnames(contrasts) %>%
sapply(function(cont){
lapply(thresholds, function(threshVal){
genes_passed <- genes_diversity[[cont]] %>%
as_tibble() %>%
dplyr::filter(diversity <= threshVal) %>%
pull(gene_id)
cpm(
dgeList$counts[genes_passed,],
log = TRUE
) %>%
fry(
index = wkByID,
design = design,
contrast = contrasts[,cont],
sort = "mixed"
) %>%
rownames_to_column("Pathway") %>%
as_tibble() %>%
mutate(
diversity_threshold = threshVal,
rank = row_number(),
sig = factor(
ifelse(FDR.Mixed < 0.05, "DE", "Not DE"),
levels = c("DE", "Not DE")
)
)
})
}, simplify = FALSE)
wk_fry_eqtl_top <- sapply(wk_fry_eqtl, function(contrast){
lapply(contrast, function(threshold){
threshold[1:20,] %>%
pull(Pathway)
}) %>%
unlist() %>%
unique()
}, simplify = FALSE)
rankPlots_wk_fry <- sapply(colnames(contrasts), function(cont){
# plotly::ggplotly({
lapply(wk_fry_eqtl[[cont]], function(threshold){
threshold %>%
dplyr::filter(Pathway %in% wk_fry_eqtl_top[[cont]]) %>%
mutate(rel_rank = row_number())
}) %>%
bind_rows() %>%
mutate(
label = ifelse(
diversity_threshold == 1,
str_trunc(Pathway, width = 30),
NA
)
) %>%
ggplot(aes(
diversity_threshold,
rel_rank,
# -log10(FDR.Mixed),
group = Pathway,
colour = Pathway,
text = sprintf(
paste0(
"Pathway: %s<br>",
"Rank: %i<br>",
"Relative rank: %i<br>",
"FDR (mixed): %e<br>",
"Number of genes: %i<br>",
"Direction: %s"
),
Pathway, rank, rel_rank, FDR.Mixed, NGenes, Direction
)
)) +
geom_line() +
geom_point(aes(shape = sig), size = 4) +
geom_text(aes(label = rank), colour = "black", size = 2.1) +
geom_text(aes(x = 1.01, label = label), size = 3.2, hjust = 0, na.rm = TRUE) +
scale_y_reverse(breaks = seq(1, length(wk_fry_eqtl_top[[cont]]), 1)) +
scale_x_continuous(breaks = thresholds) +
scale_shape_manual(values = c(17, 19)) +
labs(x = "Diversity score cut-off", y = "\nRelative rank") +
theme(plot.margin = margin(0, 5, 0, 0, unit = "cm")) +
coord_cartesian(clip = "off") +
theme(
legend.position = "none",
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_blank(),
panel.border = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
# }, tooltip = "text")
}, simplify = FALSE)
ggarrange(
rankPlots_wk_fry$APOE2v3_female,
NULL,
threshPlots_de$APOE2v3_female,
ncol = 1,
heights = c(4,0.2,1)
)
ggarrange(
rankPlots_wk_fry$APOE2v3_male,
NULL,
threshPlots_de$APOE2v3_male,
ncol = 1,
heights = c(4,0.2,1)
)
ggarrange(
rankPlots_wk_fry$APOE4v3_female,
NULL,
threshPlots_de$APOE4v3_female,
ncol = 1,
heights = c(4,0.2,1)
)
ggarrange(
rankPlots_wk_fry$APOE4v3_male,
NULL,
threshPlots_de$APOE4v3_male,
ncol = 1,
heights = c(4,0.2,1)
)
ranks_d <- lapply(topTables, function(x){
x %>%
mutate(stat = -log10(PValue) * sign(logFC)) %>%
dplyr::arrange(desc(stat)) %>%
with(structure(stat, names = gene_id))
})
ranks_nd <- lapply(topTables, function(x){
x %>%
mutate(stat = -log10(PValue)) %>%
dplyr::arrange(desc(stat)) %>%
with(structure(stat, names = gene_id))
})
hm_gsea_eqtl <- colnames(contrasts) %>%
sapply(function(cont){
lapply(thresholds, function(threshVal){
genes_passed <- genes_diversity[[cont]] %>%
as_tibble() %>%
dplyr::filter(diversity <= threshVal) %>%
pull(gene_id)
fgseaMultilevel(hmByID, ranks_d[[cont]][genes_passed], eps = 0) %>%
as_tibble() %>%
dplyr::rename(FDR = padj, Pathway = pathway) %>%
dplyr::arrange(pval) %>%
mutate(
edgeSize = vapply(leadingEdge, length, numeric(1)),
Direction = case_when(
sign(ES) == 1 ~ "Up",
sign(ES) == -1 ~ "Down",
sign(ES) == 0 ~ "None"
),
diversity_threshold = threshVal,
rank = row_number(),
bonf.p.adj = p.adjust(pval, "bonferroni"),
sig = factor(
ifelse(bonf.p.adj < 0.05, "DE", "Not DE"),
levels = c("DE", "Not DE")
)
)
})
}, simplify = FALSE)
hm_gsea_eqtl_top <- sapply(hm_gsea_eqtl, function(contrast){
lapply(contrast, function(threshold){
threshold[1:20,] %>%
pull(Pathway)
}) %>%
unlist() %>%
unique()
}, simplify = FALSE)
rankPlots_hm_gsea <- sapply(colnames(contrasts), function(cont){
# plotly::ggplotly({
lapply(hm_gsea_eqtl[[cont]], function(threshold){
threshold %>%
dplyr::filter(Pathway %in% hm_gsea_eqtl_top[[cont]]) %>%
mutate(rel_rank = row_number())
}) %>%
bind_rows() %>%
mutate(
label = ifelse(
diversity_threshold == 1,
str_trunc(Pathway, width = 30),
NA
)
) %>%
ggplot(aes(
diversity_threshold,
rel_rank,
# -log10(FDR.Mixed),
group = Pathway,
colour = Pathway,
text = sprintf(
paste0(
"Pathway: %s<br>",
"Rank: %i<br>",
"Relative rank: %i<br>",
"FDR: %e<br>",
"Leading edge size: %i<br>",
"Direction: %s"
),
Pathway, rank, rel_rank, FDR, edgeSize, Direction
)
)) +
geom_line() +
geom_point(aes(shape = sig), size = 4) +
geom_text(aes(label = rank), colour = "black", size = 2.1) +
geom_text(aes(x = 1.01, label = label), size = 3.2, hjust = 0, na.rm = TRUE) +
scale_y_reverse(breaks = seq(1, length(hm_gsea_eqtl_top[[cont]]), 1)) +
scale_x_continuous(breaks = thresholds) +
scale_shape_manual(values = c(17, 19)) +
labs(x = "Diversity score cut-off", y = "\nRelative rank") +
theme(plot.margin = margin(0, 5, 0, 0, unit = "cm")) +
coord_cartesian(clip = "off") +
theme(
legend.position = "none",
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_blank(),
panel.border = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
# }, tooltip = "text")
}, simplify = FALSE)
ggarrange(
rankPlots_hm_gsea$APOE2v3_female,
NULL,
threshPlots_de$APOE2v3_female,
ncol = 1,
heights = c(4,0.2,1)
)
ggarrange(
rankPlots_hm_gsea$APOE2v3_male,
NULL,
threshPlots_de$APOE2v3_male,
ncol = 1,
heights = c(4,0.2,1)
)
ggarrange(
rankPlots_hm_gsea$APOE4v3_female,
NULL,
threshPlots_de$APOE4v3_female,
ncol = 1,
heights = c(4,0.2,1)
)
ggarrange(
rankPlots_hm_gsea$APOE4v3_male,
NULL,
threshPlots_de$APOE4v3_male,
ncol = 1,
heights = c(4,0.2,1)
)
kg_gsea_eqtl <- colnames(contrasts) %>%
sapply(function(cont){
lapply(thresholds, function(threshVal){
genes_passed <- genes_diversity[[cont]] %>%
as_tibble() %>%
dplyr::filter(diversity <= threshVal) %>%
pull(gene_id)
fgseaMultilevel(kgByID, ranks_d[[cont]][genes_passed], eps = 0) %>%
as_tibble() %>%
dplyr::rename(FDR = padj, Pathway = pathway) %>%
dplyr::arrange(pval) %>%
mutate(
edgeSize = vapply(leadingEdge, length, numeric(1)),
Direction = case_when(
sign(ES) == 1 ~ "Up",
sign(ES) == -1 ~ "Down",
sign(ES) == 0 ~ "None"
),
diversity_threshold = threshVal,
rank = row_number(),
bonf.p.adj = p.adjust(pval, "bonferroni"),
sig = factor(
ifelse(bonf.p.adj < 0.05, "DE", "Not DE"),
levels = c("DE", "Not DE")
)
)
})
}, simplify = FALSE)
kg_gsea_eqtl_top <- sapply(kg_gsea_eqtl, function(contrast){
lapply(contrast, function(threshold){
threshold[1:20,] %>%
pull(Pathway)
}) %>%
unlist() %>%
unique()
}, simplify = FALSE)
rankPlots_kg_gsea <- sapply(colnames(contrasts), function(cont){
# plotly::ggplotly({
lapply(kg_gsea_eqtl[[cont]], function(threshold){
threshold %>%
dplyr::filter(Pathway %in% kg_gsea_eqtl_top[[cont]]) %>%
mutate(rel_rank = row_number())
}) %>%
bind_rows() %>%
mutate(
label = ifelse(
diversity_threshold == 1,
str_trunc(Pathway, width = 30),
NA
)
) %>%
ggplot(aes(
diversity_threshold,
rel_rank,
# -log10(FDR.Mixed),
group = Pathway,
colour = Pathway,
text = sprintf(
paste0(
"Pathway: %s<br>",
"Rank: %i<br>",
"Relative rank: %i<br>",
"FDR: %e<br>",
"Leading edge size: %i<br>",
"Direction: %s"
),
Pathway, rank, rel_rank, FDR, edgeSize, Direction
)
)) +
geom_line() +
geom_point(aes(shape = sig), size = 4) +
geom_text(aes(label = rank), colour = "black", size = 2.1) +
geom_text(aes(x = 1.01, label = label), size = 3.2, hjust = 0, na.rm = TRUE) +
scale_y_reverse(breaks = seq(1, length(kg_gsea_eqtl_top[[cont]]), 1)) +
scale_x_continuous(breaks = thresholds) +
scale_shape_manual(values = c(17, 19)) +
labs(x = "Diversity score cut-off", y = "\nRelative rank") +
theme(plot.margin = margin(0, 5, 0, 0, unit = "cm")) +
coord_cartesian(clip = "off") +
theme(
legend.position = "none",
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_blank(),
panel.border = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
# }, tooltip = "text")
}, simplify = FALSE)
ggarrange(
rankPlots_kg_gsea$APOE2v3_female,
NULL,
threshPlots_de$APOE2v3_female,
ncol = 1,
heights = c(4,0.2,1)
)
ggarrange(
rankPlots_kg_gsea$APOE2v3_male,
NULL,
threshPlots_de$APOE2v3_male,
ncol = 1,
heights = c(4,0.2,1)
)
ggarrange(
rankPlots_kg_gsea$APOE4v3_female,
NULL,
threshPlots_de$APOE4v3_female,
ncol = 1,
heights = c(4,0.2,1)
)
ggarrange(
rankPlots_kg_gsea$APOE4v3_male,
NULL,
threshPlots_de$APOE4v3_male,
ncol = 1,
heights = c(4,0.2,1)
)
wk_gsea_eqtl <- colnames(contrasts) %>%
sapply(function(cont){
lapply(thresholds, function(threshVal){
genes_passed <- genes_diversity[[cont]] %>%
as_tibble() %>%
dplyr::filter(diversity <= threshVal) %>%
pull(gene_id)
fgseaMultilevel(wkByID, ranks_d[[cont]][genes_passed], eps = 0) %>%
as_tibble() %>%
dplyr::rename(FDR = padj, Pathway = pathway) %>%
dplyr::arrange(pval) %>%
mutate(
edgeSize = vapply(leadingEdge, length, numeric(1)),
Direction = case_when(
sign(ES) == 1 ~ "Up",
sign(ES) == -1 ~ "Down",
sign(ES) == 0 ~ "None"
),
diversity_threshold = threshVal,
rank = row_number(),
bonf.p.adj = p.adjust(pval, "bonferroni"),
sig = factor(
ifelse(bonf.p.adj < 0.05, "DE", "Not DE"),
levels = c("DE", "Not DE")
)
)
})
}, simplify = FALSE)
wk_gsea_eqtl_top <- sapply(wk_gsea_eqtl, function(contrast){
lapply(contrast, function(threshold){
threshold[1:20,] %>%
pull(Pathway)
}) %>%
unlist() %>%
unique()
}, simplify = FALSE)
rankPlots_wk_gsea <- sapply(colnames(contrasts), function(cont){
# plotly::ggplotly({
lapply(wk_gsea_eqtl[[cont]], function(threshold){
threshold %>%
dplyr::filter(Pathway %in% wk_gsea_eqtl_top[[cont]]) %>%
mutate(rel_rank = row_number())
}) %>%
bind_rows() %>%
mutate(
label = ifelse(
diversity_threshold == 1,
str_trunc(Pathway, width = 30),
NA
)
) %>%
ggplot(aes(
diversity_threshold,
rel_rank,
# -log10(FDR.Mixed),
group = Pathway,
colour = Pathway,
text = sprintf(
paste0(
"Pathway: %s<br>",
"Rank: %i<br>",
"Relative rank: %i<br>",
"FDR: %e<br>",
"Leading edge size: %i<br>",
"Direction: %s"
),
Pathway, rank, rel_rank, FDR, edgeSize, Direction
)
)) +
geom_line() +
geom_point(aes(shape = sig), size = 4) +
geom_text(aes(label = rank), colour = "black", size = 2.1) +
geom_text(aes(x = 1.01, label = label), size = 3.2, hjust = 0, na.rm = TRUE) +
scale_y_reverse(breaks = seq(1, length(wk_gsea_eqtl_top[[cont]]), 1)) +
scale_x_continuous(breaks = thresholds) +
scale_shape_manual(values = c(17, 19)) +
labs(x = "Diversity score cut-off", y = "\nRelative rank") +
theme(plot.margin = margin(0, 5, 0, 0, unit = "cm")) +
coord_cartesian(clip = "off") +
theme(
legend.position = "none",
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_blank(),
panel.border = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
# }, tooltip = "text")
}, simplify = FALSE)
ggarrange(
rankPlots_wk_gsea$APOE2v3_female,
NULL,
threshPlots_de$APOE2v3_female,
ncol = 1,
heights = c(4,0.2,1)
)
ggarrange(
rankPlots_wk_gsea$APOE2v3_male,
NULL,
threshPlots_de$APOE2v3_male,
ncol = 1,
heights = c(4,0.2,1)
)
ggarrange(
rankPlots_wk_gsea$APOE4v3_female,
NULL,
threshPlots_de$APOE4v3_female,
ncol = 1,
heights = c(4,0.2,1)
)
ggarrange(
rankPlots_wk_gsea$APOE4v3_male,
NULL,
threshPlots_de$APOE4v3_male,
ncol = 1,
heights = c(4,0.2,1)
)